global SSDIMed "/disk/agedisk4/medicare.work/miller-DUA50377/proj_ssdi"
* Settings
version 16
do "$SSDIMed/scripts/_auxiliary/_project_settings.do"

* -----------------------------------------------------------------------------------------------
* Build dataset on economic conditions, population, in months of SSDI filing
* Inputs:
*   BLS - Unemployment rates by county and month
*   Census - CDC Wonder population counts
*   SSA DAF PUF - Counts of SSDI beneficiaries, by [filing month] x [Medicare eligibility month]
* Output:
*   Unemployment and population in months of SSDI filing, by county and SSDI Medicare eligibility month
* -----------------------------------------------------------------------------------------------

***********
* Merging DAF PUF and master cohort 
*   master cohort has: fips code, age at Medicare entry, covstart month 
*   DAFPUF has: covstart month, age at Medicare entry, month of application (for 10% sample) 
*   LAUS has: moving average number of unemployed and unemployment rate for previous 12 months in the county X yearmonth 
* Steps:
*   Step 1 (DAFPUF): for each month of Medicare entry, get distribution of months of application 
*   Step 2 (BLS & Census): county and month of Medicare entry, calculate average unemployment rate and population in months of SSDI filing


****************************
* Step 1 (DAFPUF): for each year of year of Medicare entry, get distribution of months of application 

use "$SSDIMed/data/proc/ssa/DAF18/daf18_puf_ann_dmg_sample.dta", clear

* year of filing
gen ssdi_filing_year = yofd(bdof_puf1)
gen covstart_year=year(dofm(covstart_month))


* Unemployment and county population counts start in 1990m1; drop prior filing months 
drop if ssdi_filing_year < 1990

* How many in each filing month and Medicare coverage start month
assert !missing(ssdi_filing_year, covstart_year)
gen double ssdi_benes = 1 
gcollapse (sum) ssdi_benes, by(ssdi_filing_year covstart_year) fast
***how much of a covstart year can we cover with ssdi_filing_year>2005?
bys covstart_year: gegen totalssdi_benes=total(ssdi_benes) 
gen ssdi_benespct=ssdi_benes/totalssdi_benes 
sum ssdi_benespct, d 
bys covstart_year: gegen ACSssdi_beneshelper=total(ssdi_benespct) if ssdi_filing_year>2005
bys covstart_year: gegen ACSssdi_benes=mean(ACSssdi_beneshelper) 
tab covstart_year, sum( ACSssdi_benes)
***answer is that we can use covstart_year>=2007 
keep ssdi_filing_year covstart_year ssdi_benes


label var ssdi_filing_year "SSDI year of filing entry 1 = yofd(bdof_puf1)"
label var ssdi_benes "Number of ssdi_benes per filing and Medicare entry year"

* What is the average diff?
gen diff = covstart_year - ssdi_filing_year
sum diff [aw = ssdi_benes], d
drop diff

* Sort and save
compress


sort ssdi_filing_year covstart_year
order ssdi_filing_year covstart_year
tempfile filing_covstart_cells
save `filing_covstart_cells'


****************************
* Step 2 (BLS, Census, Step 1): construct population and unemployment rates for each covstart month, based on conditions when individuals filed for SSDI
* Note: it takes about 1.5 hours to run Step 2, due to the joinby, merge, and gcollapse steps


use "$SSDIMed/data/proc/acs/LAUS_ctyXyr.dta", clear
gisid fips year
assert inrange(year, 1990, 2019)
keep fips year unemp 
order fips year 

* Join in covstart month observerations (and # of filers), by ssdi filing month
rename year ssdi_filing_year
joinby ssdi_filing_year using `filing_covstart_cells'
gisid fips covstart_year ssdi_filing_year
* Unemployment and county population counts start in 1990m1; drop prior filing months 
drop if ssdi_filing_year < 1990

* Add in population counts (by age and overall, in wide format, where an obs is fipscounty year)
gen year=ssdi_filing_year 
rename fips fipscounty 
merge m:1 fipscounty year using "$SSDIMed/data/proc/census/popest/cdc-wonder-county-pop_byage_1990-2019_wide.dta", keep(match master) nogen noreport


saveold "$SSDIMed/data/temp/bls_census_yearly.dta", replace



* QC: Population counts are missing for PR (state fips 72) only
foreach var in pop18 pop60 pop_all pop_18_60 {
  assert missing(`var') if (substr(fipscounty, 1, 2) == "72")
}

* Collapse down to covstart_month level, weighting by ssdi benes applying in each month
gcollapse (mean) unemp pop* [aw = ssdi_benes], by(fipscounty covstart_year) labelformat(#sourcelabel#)

* QC: Population counts are missing for PR (state fips 72) only
foreach var in pop18 pop60 pop_all pop_18_60 {
  di "`var'"
  assert missing(`var') if (substr(fipscounty, 1, 2) == "72")
}

* Rename variables to reflect that they correspond to conditions in the months of application
rename unemp* unemp*_atapp
rename pop* pop*_atapp

* Sort and save
compress
bysort fipscounty covstart_year: assert _N == 1
order fipscounty covstart_year
save "$SSDIMed/data/proc/public/unemp_pop_atappyearly.dta", replace

*/


**************Step 3: same as Step 2 NOW USING ACS MEASURES 

use "$SSDIMed/data/proc/acs/ACS_ctyXyr.dta", clear
gisid fips year
keep if inrange(year,2005, 2019)
assert inrange(year, 1990, 2019)
order fips year 
gen unemp_nationalACS=100*unemployed/(employed+unemployed)
gen unemp_50=100*unemployed_50/(employed_50+unemployed_50)
gen unemp_HS=100*unemployed_HS/(employed_HS+unemployed_HS)
drop employed* unemployed*

* Join in covstart month observerations (and # of filers), by ssdi filing month
rename year ssdi_filing_year
joinby ssdi_filing_year using `filing_covstart_cells'

* Unemployment and county population counts start in 2005; drop prior filing years
drop if ssdi_filing_year < 2005

* As shown above, miss most of the benes for covstart_years before 2008; keep only those 
drop if covstart_year<2008

tab ssdi_filing_year covstart_year, mi 
gisid fips covstart_year ssdi_filing_year

* Add in population counts (by age and overall, in wide format, where an obs is fipscounty year)
gen year=ssdi_filing_year 
rename fips fipscounty 
merge m:1 fipscounty year using "$SSDIMed/data/proc/census/popest/cdc-wonder-county-pop_byage_1990-2019_wide.dta", keep(match master) nogen noreport


saveold "$SSDIMed/data/temp/acs_census_yearly.dta", replace



* QC: Population counts are missing for PR (state fips 72) only
foreach var in pop18 pop60 pop_all pop_18_60 {
  assert missing(`var') if (substr(fipscounty, 1, 2) == "72")
}

* Collapse down to covstart_month level, weighting by ssdi benes applying in each month
gcollapse (mean) unemp* pop* [aw = ssdi_benes], by(fipscounty covstart_year) labelformat(#sourcelabel#)

* QC: Population counts are missing for PR (state fips 72) only
foreach var in pop18 pop60 pop_all pop_18_60 {
  di "`var'"
  assert missing(`var') if (substr(fipscounty, 1, 2) == "72")
}

* Rename variables to reflect that they correspond to conditions in the months of application
rename unemp* unemp*_atapp
rename pop* pop*_atapp

* Sort and save
compress
bysort fipscounty covstart_year: assert _N == 1
order fipscounty covstart_year
save "$SSDIMed/data/proc/public/unemp_pop_atappyearlyACS.dta", replace



